From 91006d5acb030cd3b35730c710452efd1aefe029 Mon Sep 17 00:00:00 2001
From: Tim Starling <tstarling@users.mediawiki.org>
Date: Sun, 6 Aug 2006 14:01:47 +0000
Subject: [PATCH] Optimised Sanitizer::removeHTMLtags, Parser::unstrip,
 Parser::doMagicLinks, Parser::replaceInternalLinks and
 Parser::replace_callback. Introduced fast abridged entry point
 Parser::recursiveTagParse, for recursive parsing called from Cite.php and
 similar extensions. Resolved a known fail parser test.

---
 includes/Parser.php         | 611 +++++++++++++++++-------------------
 includes/Sanitizer.php      | 124 ++++----
 maintenance/parserTests.txt |   2 +-
 3 files changed, 352 insertions(+), 385 deletions(-)

diff --git a/includes/Parser.php b/includes/Parser.php
index dd8415469d..a13a0ac998 100644
--- a/includes/Parser.php
+++ b/includes/Parser.php
@@ -187,6 +187,7 @@ class Parser
 	 * @private
 	 */
 	function clearState() {
+		wfProfileIn( __METHOD__ );
 		if ( $this->mFirstCall ) {
 			$this->firstCallInit();
 		}
@@ -228,6 +229,7 @@ class Parser
 		$this->mForceTocPosition = false;
 
 		wfRunHooks( 'ParserClearState', array( &$this ) );
+		wfProfileOut( __METHOD__ );
 	}
 
 	/**
@@ -235,7 +237,7 @@ class Parser
 	 *
 	 * @public
 	 */
-	function UniqPrefix() {
+	function uniqPrefix() {
 		return $this->mUniqPrefix;
 	}
 
@@ -282,12 +284,6 @@ class Parser
 		$text = $this->strip( $text, $x );
 		wfRunHooks( 'ParserAfterStrip', array( &$this, &$text, &$x ) );
 
-		# Hook to suspend the parser in this state
-		if ( !wfRunHooks( 'ParserBeforeInternalParse', array( &$this, &$text, &$x ) ) ) {
-			wfProfileOut( $fname );
-			return $text ;
-		}
-
 		$text = $this->internalParse( $text );
 
 		$text = $this->unstrip( $text, $this->mStripState );
@@ -358,6 +354,21 @@ class Parser
 		return $this->mOutput;
 	}
 
+	/**
+	 * Recursive parser entry point that can be called from an extension tag
+	 * hook.
+	 */
+	function recursiveTagParse( $text ) {
+		wfProfileIn( __METHOD__ );
+		$x =& $this->mStripState;
+		wfRunHooks( 'ParserBeforeStrip', array( &$this, &$text, &$x ) );
+		$text = $this->strip( $text, $x );
+		wfRunHooks( 'ParserAfterStrip', array( &$this, &$text, &$x ) );
+		$text = $this->internalParse( $text );
+		wfProfileOut( __METHOD__ );
+		return $text;
+	}
+
 	/**
 	 * Get a random string
 	 *
@@ -395,8 +406,7 @@ class Parser
 	 * @static
 	 */
 	function extractTagsAndParams($elements, $text, &$matches, $uniq_prefix = ''){
-		$rand = Parser::getRandomString();
-		$n = 1;
+		static $n = 1;
 		$stripped = '';
 		$matches = array();
 
@@ -423,7 +433,7 @@ class Parser
 				$inside     = $p[4];
 			}
 
-			$marker = "$uniq_prefix-$element-$rand" . sprintf('%08X', $n++) . '-QINU';
+			$marker = "$uniq_prefix-$element-" . sprintf('%08X', $n++) . '-QINU';
 			$stripped .= $marker;
 
 			if ( $close === '/>' ) {
@@ -474,6 +484,7 @@ class Parser
 	 * @private
 	 */
 	function strip( $text, &$state, $stripcomments = false , $dontstrip = array () ) {
+		wfProfileIn( __METHOD__ );
 		$render = ($this->mOutputType == OT_HTML);
 
 		# Replace any instances of the placeholders
@@ -505,6 +516,7 @@ class Parser
 			list( $element, $content, $params, $tag ) = $data;
 			if( $render ) {
 				$tagName = strtolower( $element );
+				wfProfileIn( __METHOD__."-render-$tagName" );
 				switch( $tagName ) {
 				case '!--':
 					// Comment
@@ -539,14 +551,22 @@ class Parser
 						throw new MWException( "Invalid call hook $element" );
 					}
 				}
+				wfProfileOut( __METHOD__."-render-$tagName" );
 			} else {
 				// Just stripping tags; keep the source
 				$output = $tag;
 			}
+
+			// Unstrip the output, because unstrip() is no longer recursive so 
+			// it won't do it itself
+			$output = $this->unstrip( $output, $state );
+
 			if( !$stripcomments && $element == '!--' ) {
 				$commentState[$marker] = $output;
+			} elseif ( $element == 'html' || $element == 'nowiki' ) {
+				$state['nowiki'][$marker] = $output;
 			} else {
-				$state[$element][$marker] = $output;
+				$state['general'][$marker] = $output;
 			}
 		}
 
@@ -559,6 +579,7 @@ class Parser
 			$text = strtr( $text, $commentState );
 		}
 
+		wfProfileOut( __METHOD__ );
 		return $text;
 	}
 
@@ -569,20 +590,14 @@ class Parser
 	 * @private
 	 */
 	function unstrip( $text, &$state ) {
-		if ( !is_array( $state ) ) {
+		if ( !isset( $state['general'] ) ) {
 			return $text;
 		}
 
-		$replacements = array();
-		foreach( $state as $tag => $contentDict ) {
-			if( $tag != 'nowiki' && $tag != 'html' ) {
-				foreach( $contentDict as $uniq => $content ) {
-					$replacements[$uniq] = $content;
-				}
-			}
-		}
-		$text = strtr( $text, $replacements );
-
+		wfProfileIn( __METHOD__ );
+		# TODO: good candidate for FSS
+		$text = strtr( $text, $state['general'] );
+		wfProfileOut( __METHOD__ );
 		return $text;
 	}
 
@@ -592,20 +607,15 @@ class Parser
 	 * @private
 	 */
 	function unstripNoWiki( $text, &$state ) {
-		if ( !is_array( $state ) ) {
+		if ( !isset( $state['nowiki'] ) ) {
 			return $text;
 		}
 
-		$replacements = array();
-		foreach( $state as $tag => $contentDict ) {
-			if( $tag == 'nowiki' || $tag == 'html' ) {
-				foreach( $contentDict as $uniq => $content ) {
-					$replacements[$uniq] = $content;
-				}
-			}
-		}
-		$text = strtr( $text, $replacements );
-
+		wfProfileIn( __METHOD__ );
+		# TODO: good candidate for FSS
+		$text = strtr( $text, $state['nowiki'] );
+		wfProfileOut( __METHOD__ );
+		
 		return $text;
 	}
 
@@ -621,7 +631,7 @@ class Parser
 		if ( !$state ) {
 			$state = array();
 		}
-		$state['item'][$rnd] = $text;
+		$state['general'][$rnd] = $text;
 		return $rnd;
 	}
 
@@ -881,6 +891,12 @@ class Parser
 		$fname = 'Parser::internalParse';
 		wfProfileIn( $fname );
 
+		# Hook to suspend the parser in this state
+		if ( !wfRunHooks( 'ParserBeforeInternalParse', array( &$this, &$text, &$x ) ) ) {
+			wfProfileOut( $fname );
+			return $text ;
+		}
+
 		# Remove <noinclude> tags and <includeonly> sections
 		$text = strtr( $text, array( '<onlyinclude>' => '' , '</onlyinclude>' => '' ) );
 		$text = strtr( $text, array( '<noinclude>' => '', '</noinclude>' => '') );
@@ -927,9 +943,52 @@ class Parser
 	 * @private
 	 */
 	function &doMagicLinks( &$text ) {
-		$text = $this->magicISBN( $text );
-		$text = $this->magicRFC( $text, 'RFC ', 'rfcurl' );
-		$text = $this->magicRFC( $text, 'PMID ', 'pubmedurl' );
+		wfProfileIn( __METHOD__ );
+		$text = preg_replace_callback( 
+			'!(?:                           # Start cases
+			    <a.*?</a>			# Skip link text
+			    <.*?> |                     # Skip stuff inside HTML elements
+			    (?:RFC|PMID)\s+([0-9]+) |   # RFC or PMID, capture number as m[1]
+			    ISBN\s+([0-9Xx-]+)          # ISBN, capture number as m[2]
+			)!x', array( &$this, 'magicLinkCallback' ), $text );
+		wfProfileOut( __METHOD__ );
+		return $text; 
+	}
+
+	function magicLinkCallback( $m ) {
+		if ( substr( $m[0], 0, 1 ) == '<' ) {
+			# Skip HTML element
+			return $m[0];
+		} elseif ( substr( $m[0], 0, 4 ) == 'ISBN' ) {
+			$isbn = $m[2];
+			$num = strtr( $isbn, array( 
+				'-' => '',
+				' ' => '',
+				'x' => 'X',
+			));
+			$titleObj = Title::makeTitle( NS_SPECIAL, 'Booksources' );
+			$text = '<a href="' .
+				$titleObj->escapeLocalUrl( "isbn=$num" ) .
+				"\" class=\"internal\">ISBN $isbn</a>";
+		} else {
+			if ( substr( $m[0], 0, 3 ) == 'RFC' ) {
+				$keyword = 'RFC';
+				$urlmsg = 'rfcurl';
+				$id = $m[1];
+			} elseif ( substr( $m[0], 0, 4 ) == 'PMID' ) {
+				$keyword = 'PMID';
+				$urlmsg = 'pubmedurl';
+				$id = $m[1];
+			} else {
+				throw new MWException( __METHOD__.': unrecognised match type "' . 
+					substr($m[0], 0, 20 ) . '"' );
+			}
+		
+			$url = wfMsg( $urlmsg, $id);
+			$sk =& $this->mOptions->getSkin();
+			$la = $sk->getExternalLinkAttributes( $url, $keyword.$id );
+			$text = "<a href=\"{$url}\"{$la}>{$keyword} {$id}</a>";
+		}
 		return $text;
 	}
 
@@ -1391,7 +1450,7 @@ class Parser
 		$useLinkPrefixExtension = $wgContLang->linkPrefixExtension();
 
 		if( is_null( $this->mTitle ) ) {
-			throw new MWException( 'nooo' );
+			throw new MWException( __METHOD__.": \$this->mTitle is null\n" );
 		}
 		$nottalk = !$this->mTitle->isTalkPage();
 
@@ -1406,10 +1465,9 @@ class Parser
 		}
 
 		$selflink = $this->mTitle->getPrefixedText();
-		wfProfileOut( $fname.'-setup' );
-
 		$checkVariantLink = sizeof($wgContLang->getVariants())>1;
 		$useSubpages = $this->areSubpagesAllowed();
+		wfProfileOut( $fname.'-setup' );
 
 		# Loop for each link
 		for ($k = 0; isset( $a[$k] ); $k++) {
@@ -1432,6 +1490,7 @@ class Parser
 
 			$might_be_img = false;
 
+			wfProfileIn( "$fname-e1" );
 			if ( preg_match( $e1, $line, $m ) ) { # page with normal text or alt
 				$text = $m[2];
 				# If we get a ] at the beginning of $m[3] that means we have a link that's something like:
@@ -1443,27 +1502,33 @@ class Parser
 				# and no image is in sight. See bug 2095.
 				#
 				if( $text !== '' && 
-					preg_match( "/^\](.*)/s", $m[3], $n ) && 
+					substr( $m[3], 0, 1 ) === ']' && 
 					strpos($text, '[') !== false 
 				) 
 				{
 					$text .= ']'; # so that replaceExternalLinks($text) works later
-					$m[3] = $n[1];
+					$m[3] = substr( $m[3], 1 );
 				}
 				# fix up urlencoded title texts
-				if(preg_match('/%/', $m[1] ))
+				if( strpos( $m[1], '%' ) !== false ) {
 					# Should anchors '#' also be rejected?
 					$m[1] = str_replace( array('<', '>'), array('&lt;', '&gt;'), urldecode($m[1]) );
+				}
 				$trail = $m[3];
 			} elseif( preg_match($e1_img, $line, $m) ) { # Invalid, but might be an image with a link in its caption
 				$might_be_img = true;
 				$text = $m[2];
-				if(preg_match('/%/', $m[1] )) $m[1] = urldecode($m[1]);
+				if ( strpos( $m[1], '%' ) !== false ) {
+				       $m[1] = urldecode($m[1]);
+				}
 				$trail = "";
 			} else { # Invalid form; output directly
 				$s .= $prefix . '[[' . $line ;
+				wfProfileOut( "$fname-e1" );
 				continue;
 			}
+			wfProfileOut( "$fname-e1" );
+			wfProfileIn( "$fname-misc" );
 
 			# Don't allow internal links to pages containing
 			# PROTO: where PROTO is a valid URL protocol; these
@@ -1486,9 +1551,12 @@ class Parser
 				$link = substr($link, 1);
 			}
 
+			wfProfileOut( "$fname-misc" );
+			wfProfileIn( "$fname-title" );
 			$nt = Title::newFromText( $this->unstripNoWiki($link, $this->mStripState) );
 			if( !$nt ) {
 				$s .= $prefix . '[[' . $line;
+				wfProfileOut( "$fname-title" );
 				continue;
 			}
 
@@ -1501,23 +1569,26 @@ class Parser
 
 			$ns = $nt->getNamespace();
 			$iw = $nt->getInterWiki();
-
+			wfProfileOut( "$fname-title" );
+			
 			if ($might_be_img) { # if this is actually an invalid link
+				wfProfileIn( "$fname-might_be_img" );
 				if ($ns == NS_IMAGE && $noforce) { #but might be an image
 					$found = false;
 					while (isset ($a[$k+1]) ) {
 						#look at the next 'line' to see if we can close it there
 						$spliced = array_splice( $a, $k + 1, 1 );
 						$next_line = array_shift( $spliced );
-						if( preg_match("/^(.*?]].*?)]](.*)$/sD", $next_line, $m) ) {
-						# the first ]] closes the inner link, the second the image
+						$m = explode( ']]', $next_line, 3 );
+						if ( count( $m ) == 3 ) {
+							# the first ]] closes the inner link, the second the image
 							$found = true;
-							$text .= '[[' . $m[1];
+							$text .= "[[{$m[0]}]]{$m[1]}";
 							$trail = $m[2];
 							break;
-						} elseif( preg_match("/^.*?]].*$/sD", $next_line, $m) ) {
+						} elseif ( count( $m ) == 2 ) {
 							#if there's exactly one ]] that's fine, we'll keep looking
-							$text .= '[[' . $m[0];
+							$text .= "[[{$m[0]}]]{$m[1]}";
 						} else {
 							#if $next_line is invalid too, we need look no further
 							$text .= '[[' . $next_line;
@@ -1528,31 +1599,36 @@ class Parser
 						# we couldn't find the end of this imageLink, so output it raw
 						#but don't ignore what might be perfectly normal links in the text we've examined
 						$text = $this->replaceInternalLinks($text);
-						$s .= $prefix . '[[' . $link . '|' . $text;
+						$s .= "{$prefix}[[$link|$text";
 						# note: no $trail, because without an end, there *is* no trail
+						wfProfileOut( "$fname-might_be_img" );
 						continue;
 					}
 				} else { #it's not an image, so output it raw
-					$s .= $prefix . '[[' . $link . '|' . $text;
+					$s .= "{$prefix}[[$link|$text";
 					# note: no $trail, because without an end, there *is* no trail
+					wfProfileOut( "$fname-might_be_img" );
 					continue;
 				}
+				wfProfileOut( "$fname-might_be_img" );
 			}
 
 			$wasblank = ( '' == $text );
 			if( $wasblank ) $text = $link;
 
-
 			# Link not escaped by : , create the various objects
 			if( $noforce ) {
 
 				# Interwikis
+				wfProfileIn( "$fname-interwiki" );
 				if( $iw && $this->mOptions->getInterwikiMagic() && $nottalk && $wgContLang->getLanguageName( $iw ) ) {
 					$this->mOutput->addLanguageLink( $nt->getFullText() );
 					$s = rtrim($s . "\n");
 					$s .= trim($prefix . $trail, "\n") == '' ? '': $prefix . $trail;
+					wfProfileOut( "$fname-interwiki" );
 					continue;
 				}
+				wfProfileOut( "$fname-interwiki" );
 
 				if ( $ns == NS_IMAGE ) {
 					wfProfileIn( "$fname-image" );
@@ -1642,11 +1718,12 @@ class Parser
 	/**
 	 * Make a link placeholder. The text returned can be later resolved to a real link with
 	 * replaceLinkHolders(). This is done for two reasons: firstly to avoid further
-	 * parsing of interwiki links, and secondly to allow all extistence checks and
+	 * parsing of interwiki links, and secondly to allow all existence checks and
 	 * article length checks (for stub links) to be bundled into a single query.
 	 *
 	 */
 	function makeLinkHolder( &$nt, $text = '', $query = '', $trail = '', $prefix = '' ) {
+		wfProfileIn( __METHOD__ );
 		if ( ! is_object($nt) ) {
 			# Fail gracefully
 			$retVal = "<!-- ERROR -->{$prefix}{$text}{$trail}";
@@ -1668,6 +1745,7 @@ class Parser
 				$retVal = '<!--LINK '. ($nr-1) ."-->{$trail}";
 			}
 		}
+		wfProfileOut( __METHOD__ );
 		return $retVal;
 	}
 
@@ -2355,172 +2433,164 @@ class Parser
 	 *     '{' => array(				# opening parentheses
 	 *					'end' => '}',   # closing parentheses
 	 *					'cb' => array(2 => callback,	# replacement callback to call if {{..}} is found
-	 *								  4 => callback 	# replacement callback to call if {{{{..}}}} is found
+	 *								  3 => callback 	# replacement callback to call if {{{..}}} is found
 	 *								  )
 	 *					)
+	 * 					'min' => 2,     # Minimum parenthesis count in cb
+	 * 					'max' => 3,     # Maximum parenthesis count in cb
 	 * @private
 	 */
 	function replace_callback ($text, $callbacks) {
-		wfProfileIn( __METHOD__ . '-self' );
+		wfProfileIn( __METHOD__ );
 		$openingBraceStack = array();	# this array will hold a stack of parentheses which are not closed yet
 		$lastOpeningBrace = -1;			# last not closed parentheses
 
-		for ($i = 0; $i < strlen($text); $i++) {
-			# check for any opening brace
-			$rule = null;
-			$nextPos = -1;
-			foreach ($callbacks as $key => $value) {
-				$pos = strpos ($text, $key, $i);
-				if (false !== $pos && (-1 == $nextPos || $pos < $nextPos)) {
-					$rule = $value;
-					$nextPos = $pos;
-				}
+		$validOpeningBraces = implode( '', array_keys( $callbacks ) );
+		
+		$i = 0;
+		while ( $i < strlen( $text ) ) {
+			# Find next opening brace, closing brace or pipe
+			if ( $lastOpeningBrace == -1 ) {
+				$currentClosing = '';
+				$search = $validOpeningBraces;
+			} else {
+				$currentClosing = $openingBraceStack[$lastOpeningBrace]['braceEnd'];
+				$search = $validOpeningBraces . '|' . $currentClosing;
 			}
-
-			if ($lastOpeningBrace >= 0) {
-				$pos = strpos ($text, $openingBraceStack[$lastOpeningBrace]['braceEnd'], $i);
-
-				if (false !== $pos && (-1 == $nextPos || $pos < $nextPos)){
-					$rule = null;
-					$nextPos = $pos;
-				}
-
-				$pos = strpos ($text, '|', $i);
-
-				if (false !== $pos && (-1 == $nextPos || $pos < $nextPos)){
-					$rule = null;
-					$nextPos = $pos;
+			$rule = null;
+			$i += strcspn( $text, $search, $i );
+			if ( $i < strlen( $text ) ) {
+				if ( $text[$i] == '|' ) {
+					$found = 'pipe';
+				} elseif ( $text[$i] == $currentClosing ) {
+					$found = 'close';
+				} else {
+					$found = 'open';
+					$rule = $callbacks[$text[$i]];
 				}
-			}
-
-			if ($nextPos == -1)
+			} else {
+				# All done
 				break;
+			}
 
-			$i = $nextPos;
-
-			# found openning brace, lets add it to parentheses stack
-			if (null != $rule) {
+			if ( $found == 'open' ) {
+				# found opening brace, let's add it to parentheses stack
 				$piece = array('brace' => $text[$i],
 							   'braceEnd' => $rule['end'],
-							   'count' => 1,
 							   'title' => '',
 							   'parts' => null);
 
-				# count openning brace characters
-				while ($i+1 < strlen($text) && $text[$i+1] == $piece['brace']) {
-					$piece['count']++;
-					$i++;
-				}
-
-				$piece['startAt'] = $i+1;
-				$piece['partStart'] = $i+1;
+				# count opening brace characters
+				$piece['count'] = strspn( $text, $piece['brace'], $i );
+				$piece['startAt'] = $piece['partStart'] = $i + $piece['count'];
+				$i += $piece['count'];
 
-				# we need to add to stack only if openning brace count is enough for any given rule
-				foreach ($rule['cb'] as $cnt => $fn) {
-					if ($piece['count'] >= $cnt) {
-						$lastOpeningBrace ++;
-						$openingBraceStack[$lastOpeningBrace] = $piece;
-						break;
+				# we need to add to stack only if opening brace count is enough for one of the rules
+				if ( $piece['count'] >= $rule['min'] ) {
+					$lastOpeningBrace ++;
+					$openingBraceStack[$lastOpeningBrace] = $piece;
+				}
+			} elseif ( $found == 'close' ) {
+				# lets check if it is enough characters for closing brace
+				$maxCount = $openingBraceStack[$lastOpeningBrace]['count'];
+				$count = strspn( $text, $text[$i], $i, $maxCount );
+
+				# check for maximum matching characters (if there are 5 closing 
+				# characters, we will probably need only 3 - depending on the rules)
+				$matchingCount = 0;
+				$matchingCallback = null;
+				$cbType = $callbacks[$openingBraceStack[$lastOpeningBrace]['brace']];
+				if ( $count > $cbType['max'] ) {
+					# The specified maximum exists in the callback array, unless the caller 
+					# has made an error
+					$matchingCount = $cbType['max'];
+				} else {
+					# Count is less than the maximum
+					# Skip any gaps in the callback array to find the true largest match
+					# Need to use array_key_exists not isset because the callback can be null
+					$matchingCount = $count;
+					while ( $matchingCount > 0 && !array_key_exists( $matchingCount, $cbType['cb'] ) ) {
+						--$matchingCount;
 					}
 				}
 
-				continue;
-			}
-			else if ($lastOpeningBrace >= 0) {
-				# first check if it is a closing brace
-				if ($openingBraceStack[$lastOpeningBrace]['braceEnd'] == $text[$i]) {
-					# lets check if it is enough characters for closing brace
-					$count = 1;
-					while ($i+$count < strlen($text) && $text[$i+$count] == $text[$i])
-						$count++;
-
-					# if there are more closing parentheses than opening ones, we parse less
-					if ($openingBraceStack[$lastOpeningBrace]['count'] < $count)
-						$count = $openingBraceStack[$lastOpeningBrace]['count'];
-
-					# check for maximum matching characters (if there are 5 closing characters, we will probably need only 3 - depending on the rules)
-					$matchingCount = 0;
-					$matchingCallback = null;
-					foreach ($callbacks[$openingBraceStack[$lastOpeningBrace]['brace']]['cb'] as $cnt => $fn) {
-						if ($count >= $cnt && $matchingCount < $cnt) {
-							$matchingCount = $cnt;
-							$matchingCallback = $fn;
-						}
-					}
+				if ($matchingCount <= 0) {
+					$i += $count;
+					continue;
+				}
+				$matchingCallback = $cbType['cb'][$matchingCount];
 
-					if ($matchingCount == 0) {
-						$i += $count - 1;
-						continue;
-					}
+				# let's set a title or last part (if '|' was found)
+				if (null === $openingBraceStack[$lastOpeningBrace]['parts']) {
+					$openingBraceStack[$lastOpeningBrace]['title'] = 
+						substr($text, $openingBraceStack[$lastOpeningBrace]['partStart'], 
+						$i - $openingBraceStack[$lastOpeningBrace]['partStart']);
+				} else {
+					$openingBraceStack[$lastOpeningBrace]['parts'][] = 
+						substr($text, $openingBraceStack[$lastOpeningBrace]['partStart'], 
+						$i - $openingBraceStack[$lastOpeningBrace]['partStart']);
+				}
 
-					# lets set a title or last part (if '|' was found)
-					if (null === $openingBraceStack[$lastOpeningBrace]['parts'])
-						$openingBraceStack[$lastOpeningBrace]['title'] = substr($text, $openingBraceStack[$lastOpeningBrace]['partStart'], $i - $openingBraceStack[$lastOpeningBrace]['partStart']);
-					else
-						$openingBraceStack[$lastOpeningBrace]['parts'][] = substr($text, $openingBraceStack[$lastOpeningBrace]['partStart'], $i - $openingBraceStack[$lastOpeningBrace]['partStart']);
-
-					$pieceStart = $openingBraceStack[$lastOpeningBrace]['startAt'] - $matchingCount;
-					$pieceEnd = $i + $matchingCount;
-
-					if( is_callable( $matchingCallback ) ) {
-						$cbArgs = array (
-										 'text' => substr($text, $pieceStart, $pieceEnd - $pieceStart),
-										 'title' => trim($openingBraceStack[$lastOpeningBrace]['title']),
-										 'parts' => $openingBraceStack[$lastOpeningBrace]['parts'],
-										 'lineStart' => (($pieceStart > 0) && ($text[$pieceStart-1] == "\n")),
-										 );
-						# finally we can call a user callback and replace piece of text
-						wfProfileOut( __METHOD__ . '-self' );
-						$replaceWith = call_user_func( $matchingCallback, $cbArgs );
-						wfProfileIn( __METHOD__ . '-self' );
-						$text = substr($text, 0, $pieceStart) . $replaceWith . substr($text, $pieceEnd);
-						$i = $pieceStart + strlen($replaceWith) - 1;
-					}
-					else {
-						# null value for callback means that parentheses should be parsed, but not replaced
-						$i += $matchingCount - 1;
-					}
+				$pieceStart = $openingBraceStack[$lastOpeningBrace]['startAt'] - $matchingCount;
+				$pieceEnd = $i + $matchingCount;
+
+				if( is_callable( $matchingCallback ) ) {
+					$cbArgs = array (
+									 'text' => substr($text, $pieceStart, $pieceEnd - $pieceStart),
+									 'title' => trim($openingBraceStack[$lastOpeningBrace]['title']),
+									 'parts' => $openingBraceStack[$lastOpeningBrace]['parts'],
+									 'lineStart' => (($pieceStart > 0) && ($text[$pieceStart-1] == "\n")),
+									 );
+					# finally we can call a user callback and replace piece of text
+					$replaceWith = call_user_func( $matchingCallback, $cbArgs );
+					$text = substr($text, 0, $pieceStart) . $replaceWith . substr($text, $pieceEnd);
+					$i = $pieceStart + strlen($replaceWith);
+				} else {
+					# null value for callback means that parentheses should be parsed, but not replaced
+					$i += $matchingCount;
+				}
 
-					# reset last openning parentheses, but keep it in case there are unused characters
-					$piece = array('brace' => $openingBraceStack[$lastOpeningBrace]['brace'],
-								   'braceEnd' => $openingBraceStack[$lastOpeningBrace]['braceEnd'],
-								   'count' => $openingBraceStack[$lastOpeningBrace]['count'],
-								   'title' => '',
-								   'parts' => null,
-								   'startAt' => $openingBraceStack[$lastOpeningBrace]['startAt']);
-					$openingBraceStack[$lastOpeningBrace--] = null;
-
-					if ($matchingCount < $piece['count']) {
-						$piece['count'] -= $matchingCount;
-						$piece['startAt'] -= $matchingCount;
-						$piece['partStart'] = $piece['startAt'];
-						# do we still qualify for any callback with remaining count?
-						foreach ($callbacks[$piece['brace']]['cb'] as $cnt => $fn) {
-							if ($piece['count'] >= $cnt) {
-								$lastOpeningBrace ++;
-								$openingBraceStack[$lastOpeningBrace] = $piece;
-								break;
-							}
+				# reset last opening parentheses, but keep it in case there are unused characters
+				$piece = array('brace' => $openingBraceStack[$lastOpeningBrace]['brace'],
+							   'braceEnd' => $openingBraceStack[$lastOpeningBrace]['braceEnd'],
+							   'count' => $openingBraceStack[$lastOpeningBrace]['count'],
+							   'title' => '',
+							   'parts' => null,
+							   'startAt' => $openingBraceStack[$lastOpeningBrace]['startAt']);
+				$openingBraceStack[$lastOpeningBrace--] = null;
+
+				if ($matchingCount < $piece['count']) {
+					$piece['count'] -= $matchingCount;
+					$piece['startAt'] -= $matchingCount;
+					$piece['partStart'] = $piece['startAt'];
+					# do we still qualify for any callback with remaining count?
+					$currentCbList = $callbacks[$piece['brace']]['cb'];
+					while ( $piece['count'] ) {
+						if ( array_key_exists( $piece['count'], $currentCbList ) ) {
+							$lastOpeningBrace++;
+							$openingBraceStack[$lastOpeningBrace] = $piece;
+							break;
 						}
+						--$piece['count'];
 					}
-					continue;
 				}
-
+			} elseif ( $found == 'pipe' ) {
 				# lets set a title if it is a first separator, or next part otherwise
-				if ($text[$i] == '|') {
-					if (null === $openingBraceStack[$lastOpeningBrace]['parts']) {
-						$openingBraceStack[$lastOpeningBrace]['title'] = substr($text, $openingBraceStack[$lastOpeningBrace]['partStart'], $i - $openingBraceStack[$lastOpeningBrace]['partStart']);
-						$openingBraceStack[$lastOpeningBrace]['parts'] = array();
-					}
-					else
-						$openingBraceStack[$lastOpeningBrace]['parts'][] = substr($text, $openingBraceStack[$lastOpeningBrace]['partStart'], $i - $openingBraceStack[$lastOpeningBrace]['partStart']);
-
-					$openingBraceStack[$lastOpeningBrace]['partStart'] = $i + 1;
+				if (null === $openingBraceStack[$lastOpeningBrace]['parts']) {
+					$openingBraceStack[$lastOpeningBrace]['title'] = 
+						substr($text, $openingBraceStack[$lastOpeningBrace]['partStart'], 
+						$i - $openingBraceStack[$lastOpeningBrace]['partStart']);
+					$openingBraceStack[$lastOpeningBrace]['parts'] = array();
+				} else {
+					$openingBraceStack[$lastOpeningBrace]['parts'][] = 
+						substr($text, $openingBraceStack[$lastOpeningBrace]['partStart'], 
+						$i - $openingBraceStack[$lastOpeningBrace]['partStart']);
 				}
+				$openingBraceStack[$lastOpeningBrace]['partStart'] = ++$i;
 			}
 		}
 
-		wfProfileOut( __METHOD__ . '-self' );
+		wfProfileOut( __METHOD__ );
 		return $text;
 	}
 
@@ -2545,7 +2615,7 @@ class Parser
 			return $text;
 		}
 
-		$fname = 'Parser::replaceVariables';
+		$fname = __METHOD__ /*. '-L' . count( $this->mArgStack )*/;
 		wfProfileIn( $fname );
 
 		# This function is called recursively. To keep track of arguments we need a stack:
@@ -2558,13 +2628,25 @@ class Parser
 		if ( $this->mOutputType == OT_HTML || $this->mOutputType == OT_WIKI ) {
 			$braceCallbacks[3] = array( &$this, 'argSubstitution' );
 		}
-		$callbacks = array();
-		$callbacks['{'] = array('end' => '}', 'cb' => $braceCallbacks);
-		$callbacks['['] = array('end' => ']', 'cb' => array(2=>null));
-		$text = $this->replace_callback ($text, $callbacks);
-
-		array_pop( $this->mArgStack );
+		if ( $braceCallbacks ) {
+			$callbacks = array( 
+				'{' => array(
+					'end' => '}',
+					'cb' => $braceCallbacks,
+					'min' => $argsOnly ? 3 : 2,
+					'max' => isset( $braceCallbacks[3] ) ? 3 : 2,
+				),
+				'[' => array( 
+					'end' => ']', 
+					'cb' => array(2=>null),
+					'min' => 2,
+					'max' => 2,
+				)
+			);
+			$text = $this->replace_callback ($text, $callbacks);
 
+			array_pop( $this->mArgStack );
+		}
 		wfProfileOut( $fname );
 		return $text;
 	}
@@ -2636,8 +2718,9 @@ class Parser
 	 */
 	function braceSubstitution( $piece ) {
 		global $wgContLang, $wgLang, $wgAllowDisplayTitle, $action;
-		$fname = 'Parser::braceSubstitution';
+		$fname = __METHOD__ /*. '-L' . count( $this->mArgStack )*/;
 		wfProfileIn( $fname );
+		wfProfileIn( __METHOD__.'-setup' );
 
 		# Flags
 		$found = false;             # $text has been filled
@@ -2671,8 +2754,10 @@ class Parser
 
 		$args = (null == $piece['parts']) ? array() : $piece['parts'];
 		$argc = count( $args );
+		wfProfileOut( __METHOD__.'-setup' );
 
 		# SUBST
+		wfProfileIn( __METHOD__.'-modifiers' );
 		if ( !$found ) {
 			$mwSubst =& MagicWord::get( 'subst' );
 			if ( $mwSubst->matchStartAndRemove( $part1 ) xor ($this->mOutputType == OT_WIKI) ) {
@@ -2714,6 +2799,7 @@ class Parser
 				}
 			}
 		}
+		wfProfileOut( __METHOD__.'-modifiers' );
 
 		# Parser functions
 		if ( !$found ) {
@@ -2776,7 +2862,7 @@ class Parser
 				$text = $linestart .
 					'{{' . $part1 . '}}' .
 					'<!-- WARNING: template loop detected -->';
-				wfDebug( "$fname: template loop broken at '$part1'\n" );
+				wfDebug( __METHOD__.": template loop broken at '$part1'\n" );
 			} else {
 				# set $text to cached message.
 				$text = $linestart . $this->mTemplates[$piece['title']];
@@ -3374,137 +3460,6 @@ class Parser
 		}
 	}
 
-	/**
-	 * Return an HTML link for the "ISBN 123456" text
-	 * @private
-	 */
-	function magicISBN( $text ) {
-		$fname = 'Parser::magicISBN';
-		wfProfileIn( $fname );
-
-		$a = split( 'ISBN ', ' '.$text );
-		if ( count ( $a ) < 2 ) {
-			wfProfileOut( $fname );
-			return $text;
-		}
-		$text = substr( array_shift( $a ), 1);
-		$valid = '0123456789-Xx';
-
-		foreach ( $a as $x ) {
-			# hack: don't replace inside thumbnail title/alt
-			# attributes
-			if(preg_match('/<[^>]+(alt|title)="[^">]*$/', $text)) {
-				$text .= "ISBN $x";
-				continue;
-			}
-
-			$isbn = $blank = '' ;
-			while ( $x !== '' && ' ' == $x{0} ) {
-				$blank .= ' ';
-				$x = substr( $x, 1 );
-			}
-			if ( $x == '' ) { # blank isbn
-				$text .= "ISBN $blank";
-				continue;
-			}
-			while ( strstr( $valid, $x{0} ) != false ) {
-				$isbn .= $x{0};
-				$x = substr( $x, 1 );
-			}
-			$num = str_replace( '-', '', $isbn );
-			$num = str_replace( ' ', '', $num );
-			$num = str_replace( 'x', 'X', $num );
-
-			if ( '' == $num ) {
-				$text .= "ISBN $blank$x";
-			} else {
-				$titleObj = Title::makeTitle( NS_SPECIAL, 'Booksources' );
-				$text .= '<a href="' .
-					$titleObj->escapeLocalUrl( 'isbn='.$num ) .
-					"\" class=\"internal\">ISBN $isbn</a>";
-				$text .= $x;
-			}
-		}
-		wfProfileOut( $fname );
-		return $text;
-	}
-
-	/**
-	 * Return an HTML link for the "RFC 1234" text
-	 *
-	 * @private
-	 * @param string $text     Text to be processed
-	 * @param string $keyword  Magic keyword to use (default RFC)
-	 * @param string $urlmsg   Interface message to use (default rfcurl)
-	 * @return string
-	 */
-	function magicRFC( $text, $keyword='RFC ', $urlmsg='rfcurl'  ) {
-
-		$valid = '0123456789';
-		$internal = false;
-
-		$a = split( $keyword, ' '.$text );
-		if ( count ( $a ) < 2 ) {
-			return $text;
-		}
-		$text = substr( array_shift( $a ), 1);
-
-		/* Check if keyword is preceed by [[.
-		 * This test is made here cause of the array_shift above
-		 * that prevent the test to be done in the foreach.
-		 */
-		if ( substr( $text, -2 ) == '[[' ) {
-			$internal = true;
-		}
-
-		foreach ( $a as $x ) {
-			/* token might be empty if we have RFC RFC 1234 */
-			if ( $x=='' ) {
-				$text.=$keyword;
-				continue;
-				}
-
-			# hack: don't replace inside thumbnail title/alt
-			# attributes
-			if(preg_match('/<[^>]+(alt|title)="[^">]*$/', $text)) {
-				$text .= $keyword . $x;
-				continue;
-			}
-
-			$id = $blank = '' ;
-
-			/** remove and save whitespaces in $blank */
-			while ( $x{0} == ' ' ) {
-				$blank .= ' ';
-				$x = substr( $x, 1 );
-			}
-
-			/** remove and save the rfc number in $id */
-			while ( strstr( $valid, $x{0} ) != false ) {
-				$id .= $x{0};
-				$x = substr( $x, 1 );
-			}
-
-			if ( $id == '' ) {
-				/* call back stripped spaces*/
-				$text .= $keyword.$blank.$x;
-			} elseif( $internal ) {
-				/* normal link */
-				$text .= $keyword.$id.$x;
-			} else {
-				/* build the external link*/
-				$url = wfMsg( $urlmsg, $id);
-				$sk =& $this->mOptions->getSkin();
-				$la = $sk->getExternalLinkAttributes( $url, $keyword.$id );
-				$text .= "<a href=\"{$url}\"{$la}>{$keyword}{$id}</a>{$x}";
-			}
-
-			/* Check if the next RFC keyword is preceed by [[ */
-			$internal = ( substr($x,-2) == '[[' );
-		}
-		return $text;
-	}
-
 	/**
 	 * Transform wiki markup when saving a page by doing \r\n -> \n
 	 * conversion, substitting signatures, {{subst:}} templates, etc.
diff --git a/includes/Sanitizer.php b/includes/Sanitizer.php
index a4ab66229a..185679f6cd 100644
--- a/includes/Sanitizer.php
+++ b/includes/Sanitizer.php
@@ -329,75 +329,88 @@ class Sanitizer {
 	 */
 	static function removeHTMLtags( $text, $processCallback = null, $args = array() ) {
 		global $wgUseTidy, $wgUserHtml;
-		$fname = 'Parser::removeHTMLtags';
-		wfProfileIn( $fname );
-
-		if( $wgUserHtml ) {
-			$htmlpairs = array( # Tags that must be closed
-				'b', 'del', 'i', 'ins', 'u', 'font', 'big', 'small', 'sub', 'sup', 'h1',
-				'h2', 'h3', 'h4', 'h5', 'h6', 'cite', 'code', 'em', 's',
-				'strike', 'strong', 'tt', 'var', 'div', 'center',
-				'blockquote', 'ol', 'ul', 'dl', 'table', 'caption', 'pre',
-				'ruby', 'rt' , 'rb' , 'rp', 'p', 'span', 'u'
-			);
-			$htmlsingle = array(
-				'br', 'hr', 'li', 'dt', 'dd'
-			);
-			$htmlsingleonly = array( # Elements that cannot have close tags
-				'br', 'hr'
-			);
-			$htmlnest = array( # Tags that can be nested--??
-				'table', 'tr', 'td', 'th', 'div', 'blockquote', 'ol', 'ul',
-				'dl', 'font', 'big', 'small', 'sub', 'sup', 'span'
-			);
-			$tabletags = array( # Can only appear inside table
-				'td', 'th', 'tr',
-			);
-			$htmllist = array( # Tags used by list
-				'ul','ol',
-			);
-			$listtags = array( # Tags that can appear in a list
-				'li',
-			);
 
-		} else {
-			$htmlpairs = array();
-			$htmlsingle = array();
-			$htmlnest = array();
-			$tabletags = array();
-		}
+		static $htmlpairs, $htmlsingle, $htmlsingleonly, $htmlnest, $tabletags, 
+			$htmllist, $listtags, $htmlsingleallowed, $htmlelements, $staticInitialised;
+		
+		wfProfileIn( __METHOD__ );
+		
+		if ( !$staticInitialised ) {
+			if( $wgUserHtml ) {
+				$htmlpairs = array( # Tags that must be closed
+					'b', 'del', 'i', 'ins', 'u', 'font', 'big', 'small', 'sub', 'sup', 'h1',
+					'h2', 'h3', 'h4', 'h5', 'h6', 'cite', 'code', 'em', 's',
+					'strike', 'strong', 'tt', 'var', 'div', 'center',
+					'blockquote', 'ol', 'ul', 'dl', 'table', 'caption', 'pre',
+					'ruby', 'rt' , 'rb' , 'rp', 'p', 'span', 'u'
+				);
+				$htmlsingle = array(
+					'br', 'hr', 'li', 'dt', 'dd'
+				);
+				$htmlsingleonly = array( # Elements that cannot have close tags
+					'br', 'hr'
+				);
+				$htmlnest = array( # Tags that can be nested--??
+					'table', 'tr', 'td', 'th', 'div', 'blockquote', 'ol', 'ul',
+					'dl', 'font', 'big', 'small', 'sub', 'sup', 'span'
+				);
+				$tabletags = array( # Can only appear inside table
+					'td', 'th', 'tr',
+				);
+				$htmllist = array( # Tags used by list
+					'ul','ol',
+				);
+				$listtags = array( # Tags that can appear in a list
+					'li',
+				);
+
+			} else {
+				$htmlpairs = array();
+				$htmlsingle = array();
+				$htmlnest = array();
+				$tabletags = array();
+			}
+
+			$htmlsingleallowed = array_merge( $htmlsingle, $tabletags );
+			$htmlelements = array_merge( $htmlsingle, $htmlpairs, $htmlnest );
 
-		$htmlsingleallowed = array_merge( $htmlsingle, $tabletags );
-		$htmlelements = array_merge( $htmlsingle, $htmlpairs, $htmlnest );
+			# Convert them all to hashtables for faster lookup
+			$vars = array( 'htmlpairs', 'htmlsingle', 'htmlsingleonly', 'htmlnest', 'tabletags', 
+				'htmllist', 'listtags', 'htmlsingleallowed', 'htmlelements' );
+			foreach ( $vars as $var ) {
+				$$var = array_flip( $$var );
+			}
+			$staticInitialised = true;
+		}
 
 		# Remove HTML comments
 		$text = Sanitizer::removeHTMLcomments( $text );
 		$bits = explode( '<', $text );
 		$text = array_shift( $bits );
 		if(!$wgUseTidy) {
-			$tagstack = array(); $tablestack = array();
+			$tagstack = $tablestack = array();
 			foreach ( $bits as $x ) {
 				$prev = error_reporting( E_ALL & ~( E_NOTICE | E_WARNING ) );
-				preg_match( '/^(\\/?)(\\w+)([^>]*?)(\\/{0,1}>)([^<]*)$/',
-				$x, $regs );
+				preg_match( '!^(/?)(\\w+)([^>]*?)(/{0,1}>)([^<]*)$!', $x, $regs );
 				list( $qbar, $slash, $t, $params, $brace, $rest ) = $regs;
 				error_reporting( $prev );
 
 				$badtag = 0 ;
-				if ( in_array( $t = strtolower( $t ), $htmlelements ) ) {
+				if ( isset( $htmlelements[$t = strtolower( $t )] ) ) {
 					# Check our stack
 					if ( $slash ) {
 						# Closing a tag...
-						if( in_array( $t, $htmlsingleonly ) ) {
+						if( isset( $htmlsingleonly[$t] ) ) {
 							$badtag = 1;
 						} elseif ( ( $ot = @array_pop( $tagstack ) ) != $t ) {
-							if ( in_array($ot, $htmlsingleallowed) ) {
+							if ( isset( $htmlsingleallowed[$ot] ) ) {
 								# Pop all elements with an optional close tag
 								# and see if we find a match below them
 								$optstack = array();
 								array_push ($optstack, $ot);
 								while ( ( ( $ot = @array_pop( $tagstack ) ) != $t ) &&
-												in_array($ot, $htmlsingleallowed) ) {
+										isset( $htmlsingleallowed[$ot] ) ) 
+								{
 									array_push ($optstack, $ot);
 								}
 								if ( $t != $ot ) {
@@ -410,7 +423,7 @@ class Sanitizer {
 							} else {
 								@array_push( $tagstack, $ot );
 								# <li> can be nested in <ul> or <ol>, skip those cases:
-								if(!(in_array($ot, $htmllist) && in_array($t, $listtags) )) {
+								if(!(isset( $htmllist[$ot] ) && isset( $listtags[$t] ) )) {
 									$badtag = 1;
 								}
 							}
@@ -422,20 +435,20 @@ class Sanitizer {
 						$newparams = '';
 					} else {
 						# Keep track for later
-						if ( in_array( $t, $tabletags ) &&
+						if ( isset( $tabletags[$t] ) &&
 						! in_array( 'table', $tagstack ) ) {
 							$badtag = 1;
 						} else if ( in_array( $t, $tagstack ) &&
-						! in_array ( $t , $htmlnest ) ) {
+						! isset( $htmlnest [$t ] ) ) {
 							$badtag = 1 ;
 						#Â Is it a self closed htmlpair ? (bug 5487)
 						} else if( $brace == '/>' &&
-						in_array($t, $htmlpairs) ) {
+						isset( $htmlpairs[$t] ) ) {
 							$badtag = 1;
-						} elseif( in_array( $t, $htmlsingleonly ) ) {
+						} elseif( isset( $htmlsingleonly[$t] ) ) {
 							# Hack to force empty tag for uncloseable elements
 							$brace = '/>';
-						} else if( in_array( $t, $htmlsingle ) ) {
+						} else if( isset( $htmlsingle[$t] ) ) {
 							# Hack to not close $htmlsingle tags
 							$brace = NULL;
 						} else {
@@ -475,7 +488,7 @@ class Sanitizer {
 				preg_match( '/^(\\/?)(\\w+)([^>]*?)(\\/{0,1}>)([^<]*)$/',
 				$x, $regs );
 				@list( $qbar, $slash, $t, $params, $brace, $rest ) = $regs;
-				if ( in_array( $t = strtolower( $t ), $htmlelements ) ) {
+				if ( isset( $htmlelements[$t = strtolower( $t )] ) ) {
 					if( is_callable( $processCallback ) ) {
 						call_user_func_array( $processCallback, array( &$params, $args ) );
 					}
@@ -487,7 +500,7 @@ class Sanitizer {
 				}
 			}
 		}
-		wfProfileOut( $fname );
+		wfProfileOut( __METHOD__ );
 		return $text;
 	}
 
@@ -502,8 +515,7 @@ class Sanitizer {
 	 * @return string
 	 */
 	static function removeHTMLcomments( $text ) {
-		$fname='Parser::removeHTMLcomments';
-		wfProfileIn( $fname );
+		wfProfileIn( __METHOD__ );
 		while (($start = strpos($text, '<!--')) !== false) {
 			$end = strpos($text, '-->', $start + 4);
 			if ($end === false) {
@@ -533,7 +545,7 @@ class Sanitizer {
 				$text = substr_replace($text, '', $start, $end - $start);
 			}
 		}
-		wfProfileOut( $fname );
+		wfProfileOut( __METHOD__ );
 		return $text;
 	}
 
diff --git a/maintenance/parserTests.txt b/maintenance/parserTests.txt
index cce844ecec..e4f1971f69 100644
--- a/maintenance/parserTests.txt
+++ b/maintenance/parserTests.txt
@@ -2808,7 +2808,7 @@ Thumbnail image caption with a free URL
 !! end
 
 !! test
-TODO: BUG 1887: A ISBN with a thumbnail
+BUG 1887: A ISBN with a thumbnail
 !! input
 [[Image:foobar.jpg|thumb|ISBN 12354]]
 !! result
-- 
2.20.1